set seed 1234

local outcomes = "diff_m1_chix_econ std_diff_m1_hdi std_diff_m1_democracy_index diff_m1_general_chix"
local controls = "d_region_decade*"
local bootstrap_nb = 500

** We store CCT RD estimates
use "$project_path/data/4_regdata/regdata", clear
sort Country Year Month Type_Election
foreach outcome in `outcomes' {
	local outcome_short = subinstr("`outcome'", "diff_m1_", "", . ) 
	qui rdrobust `outcome' runvar, all
	local `outcome_short'_cct_est = e(tau_cl) // Conventional local-polynomial RD estimate
	local `outcome_short'_cct_se = e(se_tau_rb) // Robust standard error of the local-polynomial RD estimator
	local `outcome_short'_cct_pv = e(pv_rb) // Robust p-value 
	local `outcome_short'_cct_n0 = e(N_h_l) // Effective number of observations (given by the bandwidth h_l) used to the left of the cutoff
	local `outcome_short'_cct_n1 = e(N_h_r) // Effective number of observations (given by the bandwidth h_r) used to the right of the cutoff
}

** Preparing data for Angrist-Rokkanen procedure

use "$project_path/data/4_regdata/regdata", clear
sort Country Year Month Type_Election
gen decade = Year - mod(Year, 10)
egen region_decade = group(Region decade)
tab region_decade, gen(d_region_decade)
keep if abs(runvar)<=10

keep Country Year Month Type_Election runvar treatment `controls' `outcomes'

tempfile ar_data
save `ar_data'

** Program which computes RD estimates based on the CIA, taken from the replication package of AR (2015), with additional comments

capture: program drop ciest
program ciest, rclass 
	version 11.0
	syntax varlist(fv), outcome(string) method(string) trim(real) //trim is set as a real value 
	
	noisily {
					
		// The first method is a linear reweighting estimator discussed by Kline (2011). 
		if ("`method'" == "kline") {
			
			// It begins by estimating linear models on both sides of the theshold: we estimate how conditional means vary with covariates
			// We estimate at the left of the threshold
			reg `outcome' `varlist' if treatment == 0
			local n0 = e(N) // Number of untreated observations
			predict pred0, xb // Calculate linear prediction
			// And at the right
			reg `outcome' `varlist' if treatment == 1
			local n1 = e(N) // Number of treated observations
			predict pred1, xb // Calculate linear prediction
			
			// For each observation, we can compute a treatment effect
			gen effect = pred1 - pred0
			drop pred1 pred0
			// We store the average predicted treatment effect on the treated
			su effect if treatment == 1 
			local effect_treated = r(mean) 
			drop effect  
						
		}
		
		// The second method is a version of the Hirano, Imbens and Ridder (2003) propensity score estimator.
		else if ("`method'" == "hir") {
		
			// We compute a propensity score, the probability of treatment given covariates
			capture noisily: xi: logit treatment `varlist' if `outcome' != ., iterate(100)
			if (_rc == 0 & `e(converged)' == 1) {
				
				// The propensity score is trimmed
				predict pred_p if `outcome' != .
				replace pred_p = . if (pred_p < `trim' | pred_p > 1 - `trim')
				
				// Number of treated and nontreated observations with measured, non-trimmed propensity scores
				su pred_p if treatment == 0
				local n0 = r(N)
				su pred_p if treatment == 1
				local n1 = r(N)
				
				// Average treatment probability
				su treatment
				local p = r(mean)
				
				// ATT	
				gen temp = `outcome' * (treatment - pred_p) / ((1-pred_p)*`p') 
				su temp
				local effect_treated = r(mean) 
								
				drop pred_p temp 
			}
			else {
				local n0 = .
				local n1 = .
				local effect_treated = .
			}
		}

	}
	return scalar effect_treated = `effect_treated'
	return scalar n0 = `n0'
	return scalar n1 = `n1'
end

** Computing CIA-based estimates for all outcomes

est clear
foreach outcome in `outcomes' {

	foreach method in kline hir {
							
		use `ar_data', clear
		
		// We only keep observations within the window of interest for which we have observations on all controls
		egen temp = rowmiss(`outcome' `controls') // This gives the number of missing values for each observation (row).			
		keep if temp == 0
		drop temp
		
		// Trimming parameters for the propensity score: for the HIR estimator, we trim at the 5th/95th percentile
		if ("`method'" == "hir") {
			local tmethod = "hir"
			local trim = 0.05
		}	
		else {
			local tmethod = "`method'"
			local trim = 0
		}
		
		// We compute the CIA-based estimator
		ciest `controls', outcome(`outcome') method(`tmethod') trim(`trim') // Using the previously defined program which computes RD estimates based on the CIA
		
		// We retrieve the output of the estimation step
		local est = `r(effect_treated)'
		local n0 = `r(n0)'
		local n1 = `r(n1)'
		
		save "$project_path/output/appendix_tables/bstemp.dta", replace
		set seed 123 
		mat bs_est = J(1, 1, .) 
		
		forval b = 1/`bootstrap_nb' {				
			bsample // Bootstrap sample with the same number of observations as the current dataset
			quietly: ciest `controls', outcome(`outcome') method(`tmethod') trim(`trim')
			mat bs_est = (bs_est\ (`r(effect_treated)')) 
			use "$project_path/output/appendix_tables/bstemp.dta", clear
		}
		erase "$project_path/output/appendix_tables/bstemp.dta"
		mat bs_est = bs_est[2..., 1...] 
		mat list bs_est
		
		
		svmat bs_est 
		su bs_est1
		local se = `r(sd)'
				
		//We report estimates of the treatment effect on the treated
		mat b = (`est')
		mat V = (`se'^2)
		
		mat colnames b = est
		mat rownames b = est
		mat colnames V = est
		mat rownames V = est

		eret post b V // Clears all existing e-class results and stores the coefficient vector (b), variance–covariance matrix (V)
		estadd scalar n0  = `n0'
		estadd scalar n1  = `n1'
		
		local outcome_short = subinstr("`outcome'", "diff_m1_", "", . )
		eststo `outcome_short'_`method'
		
		//We also store the main results in locals 
		local `outcome_short'_`method'_est = `est'
		local `outcome_short'_`method'_se = `se'
		local `outcome_short'_`method'_n0 = `n0'
		local `outcome_short'_`method'_n1 = `n1'
		local `outcome_short'_`method'_df = `n1'+`n0'-1
		local `outcome_short'_`method'_pv = 2*ttail(``outcome_short'_`method'_df',abs(``outcome_short'_`method'_est'/``outcome_short'_`method'_se'))

}

}

** Store, compare and output estimates

// We prepare variables to store estimation results
gen outcome = ""
gen outcome_label = ""
gen double cct_est = .
gen double cct_se = .
gen double cct_pv = .
gen cct_n0 = .
gen cct_n1 = .
gen double kline_est = .
gen double kline_se = .
gen double kline_pv = .
gen kline_n0 = .
gen kline_n1 = .
gen double hir_est = .
gen double hir_se = .
gen double hir_pv = .
gen hir_n0 = .
gen hir_n1 = .

// Build results table
local i = 0
foreach outcome in `outcomes' {
	
	if "`outcome'"=="diff_m1_chix_econ" local title = "Economic performance"
	if "`outcome'"=="std_diff_m1_hdi" local title = "Human Development Index"
	if "`outcome'"=="std_diff_m1_democracy_index" local title = "Democracy"
	if "`outcome'"=="diff_m1_general_chix" local title = "General index"

	local i = `i' + 1
	replace outcome = "`outcome'" in `i'
	replace outcome_label = "`title'" in `i'
	
	foreach method in cct kline hir {
		foreach k in est se pv n0 n1 {
			local outcome_short = subinstr("`outcome'", "diff_m1_", "", . )
			replace `method'_`k' = ``outcome_short'_`method'_`k'' in `i'
		}
	}
	
}

keep outcome outcome_label cct* kline* hir* 
drop if outcome==""

// We test the equality of CIA-based estimates with CCT estimates using a Z-test proposed by https://psycnet.apa.org/record/1995-27766-001
gen z_kline_cct = (cct_est-kline_est)/sqrt((cct_se)^2 + (kline_se)^2)
gen z_hir_cct = (cct_est-hir_est)/sqrt((cct_se)^2 + (hir_se)^2)
gen pval_cct_kline = 2*(1-normal(abs(z_kline_cct))) 
gen pval_cct_hir = 2*(1-normal(abs(z_hir_cct))) 
drop z_*

**	We format the results and export a table

// First, we round estimates
local N = _N

foreach v in cct_est cct_se cct_pv kline_est kline_se kline_pv hir_est hir_se hir_pv pval_cct_kline pval_cct_hir {
	gen str_`v' = ""
	forvalues k = 1/`N' {
		local `v'`k' = `v' in `k'
		local str_`v'`k' :  di %5.3f ``v'`k''
		replace str_`v' = "`str_`v'`k''" in `k'
	}
}

foreach method in cct kline hir {

	replace str_`method'_est = str_`method'_est+"*" if `method'_pv<0.1
	replace str_`method'_est = str_`method'_est+"*" if `method'_pv<0.05
	replace str_`method'_est = str_`method'_est+"*" if `method'_pv<0.01
	replace str_`method'_se  = "("+str_`method'_se+")"
	replace str_`method'_pv  = "["+str_`method'_pv+"]"
	replace str_`method'_pv  = "[$<$0.001]" if `method'_pv<0.001
	tostring `method'_n0 `method'_n1, replace force
	gen `method'_n = "N=" + `method'_n0+"/"+`method'_n1
	drop `method'_n0 `method'_n1

}

foreach v in cct_est cct_se cct_pv kline_est kline_se kline_pv hir_est hir_se hir_pv pval_cct_kline pval_cct_hir {
	drop `v'
	ren str_`v' `v'
}

// Then, we reshape the dataset and order lines
gen pval_cct_cct = ""
local var_order = "est se pv n"
foreach method in cct kline hir {
	local i = 0
	foreach v in `var_order' {
		local i = `i'+1
		ren `method'_`v' `method'`i'
	}
	ren pval_cct_`method' `method'5
}

gen order = .
replace order = 1 if outcome=="diff_m1_chix_econ"
replace order = 2 if outcome=="std_diff_m1_hdi"
replace order = 3 if outcome=="std_diff_m1_democracy_index"
replace order = 4 if outcome=="diff_m1_general_chix"
reshape long cct kline hir, i(order outcome_label) j(temp)

// We format the table
sort order temp
gen des = ""
replace outcome_label = "Equality with CCT (p-val.)"   if temp==5
replace outcome_label = "\midrule \textbf{"+outcome_label+"}" if temp==1
replace outcome_label = "" if temp!=1 & temp!=5
keep outcome_label cct kline hir
order outcome_label cct kline hir
lab var outcome_label ""
lab var cct "CCT"
lab var kline "Linear reweighting"
lab var hir "Propensity score"

local note = "\caption*{\footnotesize \emph{Notes}: This table compares our baseline estimates from \cite{cctECMA2014} (in the ''CCT'' column) to CIA-based estimates from \cite{angristRokkanen}: a linear reweighting estimator discussed by \cite{kline2011oaxaca} (in the ''Linear reweighting'' column), and a version of the \cite{hirano2003efficient} propensity score estimator (in the ''Propensity score'' column). Standard errors are reported in parentheses, and p-values are reported in brackets. We also report the number of observations on the left and right of the cutoff within the CCT-optimal bandwidth for the \cite{cctECMA2014} estimates and the number of observations on the left and right of the cutoff in the [-`subframe'pp, `subframe'pp] window for the \cite{kline2011oaxaca} and \cite{hirano2003efficient} estimates. Finally, we test the equality between the CCT and CIA-based estimates using the method of \cite{clogg1995statistical}. $^{*} p<0.10,^{**} p<0.05,^{***} p<0.01$.}"

texsave using "$project_path/output/appendix_tables/Appendix_Table_E5.tex", replace title("Angrist and Rokkanen (2015) CIA-based estimates") varlabels footnote("`note'") frag nofix marker("tab:cia_estimates") align(p{5.5cm} p{4cm} p{4cm} p{4cm}) location(H)
